import os
import shutil
import random

from tqdm import tqdm

Dataset_Path = 'C:/Users/liyuhai/Desktop/20250803_yanbu_dataset'


os.chdir(Dataset_Path)
os.mkdir('train')
os.mkdir('val')

print(len(os.listdir('img_dir')))
print(len(os.listdir('ann_dir')))


test_frac = 0.2  # 测试集比例
random.seed(123) # 随机数种子，便于复现

folder = 'img_dir'
img_paths = os.listdir(folder)
random.shuffle(img_paths) # 随机打乱

val_number = int(len(img_paths) * test_frac) # 测试集文件个数
train_files = img_paths[val_number:]         # 训练集文件名列表
val_files = img_paths[:val_number]           # 测试集文件名列表

print('数据集文件总数', len(img_paths))
print('训练集文件个数', len(train_files))
print('测试集文件个数', len(val_files))

# 将训练数据集移动到train目录
for each in tqdm(train_files):
    src_path = os.path.join(folder, each)
    dst_path = os.path.join('train', each)
    shutil.copy(src_path, dst_path)
# 将训练数据集移动到val目录
for each in tqdm(val_files):
    src_path = os.path.join(folder, each)
    dst_path = os.path.join('val', each)
    shutil.copy(src_path, dst_path)

shutil.move('train', 'img_dir/train')
shutil.move('val', 'img_dir/val')


folder = 'ann_dir'
os.mkdir('train')
os.mkdir('val')
# 把标签移动到train和val目录中
for each in tqdm(train_files):
    src_path = os.path.join(folder, each.split('.')[0]+'.png')
    dst_path = os.path.join('train', each.split('.')[0]+'.png')
    shutil.move(src_path, dst_path)

for each in tqdm(val_files):
    src_path = os.path.join(folder, each.split('.')[0]+'.png')
    dst_path = os.path.join('val', each.split('.')[0]+'.png')
    shutil.move(src_path, dst_path)

shutil.move('train', 'ann_dir/train', False)
shutil.move('val', 'ann_dir/val')

os.chdir('../')


